package com.hlmc.spider;

import com.hlmc.config.Constants;
import org.springframework.stereotype.Service;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;

@Service("dy_360spider")
public class DY360_dt_spider extends BaseProcessor {

    private Site site = Site.me().setRetryTimes(3).setSleepTime(100).setCharset("utf-8" + "");

    @Override
    public void process(Page page) {
        page.addTargetRequests(page.getHtml().xpath("//a[@class='js-tongjip']/@href").all());
        String str="https://www.360kan.com/dianying/list.php?cat=";
        if(page.getUrl().toString().contains(str))
        {
             page.addTargetRequests( page.getHtml().css("#js-ew-page a:last-child","href").all());

            page.addTargetRequests(page.getHtml().xpath("//div[@class='s-tab-main']/ul/li/a/@href").all());
        }
        String model_="//div[@class='c-top-main']/div";
        String model_r=model_+"/div[2]/div";//top-info

        if(page.getUrl().regex("https://www.360kan.com/m/\\w{14}\\.html").match()){

            //String ds_title=page.getHtml().xpath(model_r+"/div[1]/div/h1/text()").toString().replace(" ","");
            String ds_title=page.getHtml().xpath("//div[@class='title-left g-clear']/h1/text()").toString().replace(" ","");
            String image= page.getHtml().xpath(model_+"/div[1]/div/a/img/@src").toString();
            String score=page.getHtml().xpath(model_r+"/div[1]/div/span[1]/text()").toString();

            String type=page.getHtml().xpath("//div[@id='js-desc-switch']/div[1]/p[1]/a/text()").all().toString().replace("[","").replace("]","");
            String year=page.getHtml().xpath("//div[@id='js-desc-switch']/div[1]/p[2]/text()").toString().replace("[","").replace("]","");
            String area=page.getHtml().xpath("//div[@id='js-desc-switch']/div[1]/p[3]/text()").toString().replace("[","").replace("]","");
            String director=page.getHtml().xpath("//div[@id='js-desc-switch']/div[1]/p[5]/a/text()").toString().replace("[","").replace("]","");
            String actor=page.getHtml().xpath("//div[@id='js-desc-switch']/div[1]/p[6]/a/text()").all().toString().replace("[","").replace("]","");
            String detail=page.getHtml().xpath("//div[@id='js-desc-switch']/div[3]/p/text()").toString().replace("[","").replace("]","");
            String player_url=page.getHtml().xpath("//div[@id='js-site-wrap']/a[1]/@href").all().toString().replace("[","").replace("]","");

            System.out.println(ds_title);
            if(image!=null&&ds_title!=null&&!ds_title.equals("")&&!image.equals("")) {

                page.putField("title", ds_title);
                page.putField("image", image);
                page.putField("score", score);
                page.putField("type",type);
                page.putField("year",year);
                page.putField("area",area);
                page.putField("director",director);
                page.putField("actor",actor);
                page.putField("detail",detail);
                page.putField("purl",player_url);
                page.putField("category", Constants.CATEGORY_0);

            }

            if( (page.getResultItems().get("title") == null)||(page.getResultItems().get("title").toString()).replace(" ", "").equals("")) {

                page.setSkip(true);
            }

        }
    }

    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) {

        // private  DS_pipeline ds_pipeline;
        Spider.create(new DY360_dt_spider()).addUrl("https://www.360kan.com/dianying/list.php?cat=all&area=all&act=all&year=all")
                .thread(5).run();

    }

}
